# Run summary stats for foot traffic analysis


# Setup ----
options(scipen = 999)
library(caret)
library(tidyverse)
library(xtable)


load("/02_Data/FL_visitor_1804_1910_tract.RData") # NOTE: Proprietary data, values of all variables have been replaced with * 
load("/02_Data/tract_flood_FL.RData")             # NOTE: Proprietary data, values of all variables have been replaced with * 
load("/02_Data/NFIP_Michael_Claim.RData")
load("/02_Data/tract2017.RData")
load("/02_Data/NFIPTakeup.RData")
load("/02_Data/FloodZone.RData")
load("/02_Data/SavingsAccount.RData")
load("/02_Data/SCI_v3.RData")



# tract-level data as of 2017 --------------------------------------------------
tract_data = tract2017 %>%
  left_join(tract_flood_FL, by=c("GEOID"="FIPS_tract")) %>%
  left_join(NFIPTakeup, by="GEOID") %>%
  left_join(FloodZone, by="GEOID") %>%
  left_join(SavingsAccount, by="GEOID") %>%
  left_join(SCI_v3, by=c("GEOID"="tract")) %>%
  mutate(FloodDepth_ft_tract_dummy = ifelse(FloodDepth_ft_tract>0, 1, 0))



# tract-business-time level data, April 2018 to October 2019 -------------------
FL_visitor_1804_1910 = FL_visitor_1804_1910_tract %>%
  left_join(NFIP_Michael_Claim, by=c("visitor_home_tract"="CensusTract", "month")) %>%
  left_join(tract_data, by=c("visitor_home_tract"="GEOID")) %>%
  mutate(
    FloodDepth_ft_tract_dummy = ifelse(FloodDepth_ft_tract>0, 1, 0),
    ClaimAmount               = ifelse(is.na(ClaimAmount), 0, ClaimAmount),
    WAvgPaidWeek              = ifelse(is.na(WAvgPaidWeek), 0, WAvgPaidWeek),
    month                     = factor(month)
         )

date.binarized = predict(dummyVars(formula = "~ month", FL_visitor_1804_1910, sep = '_'), FL_visitor_1804_1910)        # the "dummyVars" formula in the "caret" package can be used to binarize the categotical variable automatically
colnames(date.binarized) = substr(colnames(date.binarized)[1:length(colnames(date.binarized))], 7, nchar(colnames(date.binarized)[1:length(colnames(date.binarized))]))

FL_visitor_1804_1910 = cbind.data.frame(FL_visitor_1804_1910, date.binarized)
rm(date.binarized)





# tract-business-time level data for Sep. 2018 ---------------------------------
FL_visitor_1809 = FL_visitor_1804_1910[FL_visitor_1804_1910$`18-09`==1,]

# business level data ----------------------------------------------------------
FL_business = FL_visitor_1809 %>%
  group_by(visitor_home_tract_placekey) %>%
  filter(!duplicated(placekey)) %>% 
  data.frame()




################################################################################
# Table 2: Summary Statistics for Foot Traffic Analysis
################################################################################


# -----------------------------------------------------------
# Summary stats function ------------------------------------
# -----------------------------------------------------------
sum_stats = function(var1, dataname){                                           
  # Summary stats table to be passed to dplyr functions
  # dataname is a data frame
  # y is a column in that data frame
  var1 = enquo(var1)
  
  b0 = dataname %>% select(!! var1)
  
  b1  = dataname %>% filter(FloodDepth_ft_tract_dummy==0 & !is.na(!!var1) & !is.infinite(!!var1)) %>% select(!!var1)
  b2  = dataname %>% filter(FloodDepth_ft_tract_dummy==1 & !is.na(!!var1) & !is.infinite(!!var1)) %>% select(!!var1)

  a0 <- b0 %>% 
    summarise(Mean=format(round(mean(!!var1, na.rm = TRUE), 2), nsmall = 2), 
              Median=format(round(median(!!var1, na.rm = TRUE), 2), nsmall = 2),
              SD=format(round(sd(!!var1, na.rm = TRUE), 2), nsmall = 2)) %>% 
    data.frame()
  
  a0_i <- b1 %>% 
    summarise(NoFlood=format(round(mean(!!var1, na.rm = TRUE), 2), nsmall = 2)) %>% 
    data.frame()
  
  a0_ii <- b2 %>% 
    summarise(Flooded=format(round(mean(!!var1, na.rm = TRUE), 2), nsmall = 2)) %>% 
    data.frame()
  
  a3 <- cbind.data.frame(a0, "", a0_i, a0_ii)
  rownames(a3) <- colnames(b0)[1]
  
  
  return(a3)
}





s1 = sum_stats(visitor_count_home_tract,                    FL_visitor_1809)

s2 = sum_stats(FloodDepth_ft_tract,                         tract_data)
s3 = sum_stats(FloodInsTakeup,                              tract_data)
s4 = sum_stats(FloodZone_Share_Dev_Area_tract,              tract_data)
s5 = sum_stats(medincome2017,                               tract_data)
s6 = sum_stats(population2017,                              tract_data)
s7 = sum_stats(ptg_white2017,                               tract_data)
s8 = sum_stats(ptg_OwnerOccupied2017,                       tract_data)
s9 = sum_stats(ptg_Mortgage2017,                            tract_data)
s10= sum_stats(ptg_Bachelor2017,                            tract_data)
s11= sum_stats(Gini2017,                                    tract_data)
s12= sum_stats(ptg_Unemployed2017,                          tract_data)
s13= sum_stats(ptg_SavingsAccount2017,                      tract_data)


s14= sum_stats(FloodDepth_ft_location,                      FL_business)
s15= sum_stats(dummy_brands,                                FL_business)

s_total = bind_rows(s1, s2, s3, s4, s5, s6,  s7, s8, s9,
                    s10, s11, s12, s13, s14, s15)


xtable(s_total)


